package com.hngy.scala

import org.apache.spark.{SparkConf, SparkContext}

/**
  * *需求：设置并行度
  * *1：可以在textFile或者parallelize等方法的第二个参数中设置并行度
  * *2：或者通过spark.default.parallelism参数统一设置并行度
  */
object MoreParallelismScala {

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setAppName("MoreParallelismScala")
      .setMaster("local")
      //添加并行度
      .set("spark.default.parallelism","5")

    val sc = new SparkContext(conf)
    var dataRDD = sc.parallelize(Array("hello","you","hello","me","hehe"))
    dataRDD.map((_,1)).reduceByKey(_ + _).foreach(println(_))

    sc.stop()
  }
}
